# Create Interim data folder to store the interim datasets
if (!file.exists(paste("data/interim/", poll, "/", sep = ""))) {
  dir.create(paste("data/interim/", poll, "/", sep = ""))
}

# Read Annual concentration by Monitors data-----------------

filenames_conc <- list.files("data/raw/aqs/annual_conc")
# In case there are some other files in the folder, keep only 
filenames_conc <- filenames_conc[grep("^annual_conc.*zip$", filenames_conc)]

dir_conc <- paste(wd,"/data/raw/aqs/annual_conc/", filenames_conc, sep="")

data_conc <- vector("list", length(dir_conc))
for (i in 1:length(filenames_conc)) {
  zipFileInfo <- unzip(dir_conc[i], list=TRUE)
  data_conc[[i]] <- read.csv(unz(dir_conc[i], as.character(zipFileInfo$Name)), stringsAsFactors = F)
}

# Another way to combine all elements in the list
data_conc_comb <- rbindlist(data_conc)

# Keep data with statecode countycode sitenum poc parametername year parementercode, requireddaycount 
# for these with monitors with daily required
assign(paste("data_annual", poll, sep = "_"), data_conc_comb %>% 
  dplyr::filter(Required.Day.Count>=365 & !(State.Code %in% c("80","CC")) &Parameter.Code == poll) %>% 
  dplyr::mutate(year=Year, GEOID10 = as.numeric(State.Code)*1000 + County.Code) %>%
  distinct(State.Code, County.Code, GEOID10, Site.Num, POC, Parameter.Code, year, Required.Day.Count) %>%
  arrange_all())

# save data file to interim folder
save(list=paste("data_annual", poll, sep = "_"), file = paste("data/interim/",poll,"/data_annual_", poll,".RData", sep=""))

